library(Seurat)
library(tidyverse) # dplyr and ggplot2
# List files
files <- list.files(path = "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/R project dowloaded from manual/GettingStarted_scRNASeq/data", recursive = T, pattern = "*.h5")
files
## [1] "D10_filtered_feature_bc_matrix.h5" "D16_filtered_feature_bc_matrix.h5"
## [3] "D20_filtered_feature_bc_matrix.h5" "D26_filtered_feature_bc_matrix.h5"
## [5] "W10_filtered_feature_bc_matrix.h5" "W16_filtered_feature_bc_matrix.h5"
## [7] "W20_filtered_feature_bc_matrix.h5" "W26_filtered_feature_bc_matrix.h5"
# Create a list of count matrices
h5_read <- lapply(paste0(
"C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/R project dowloaded from manual/GettingStarted_scRNASeq/data/",
files
), Read10X_h5)
# Automated way to assign names; modify for your purposes
# names(h5_read)<- sapply(files,
# function(x){str_split_1(x,"_")[1]},
# USE.NAMES = FALSE)
# Assign names manually
names(h5_read) <- c("D10", "D16", "D20", "D26", "W10", "W16", "W20", "W26")
#########################################################################
# Single sample
## W10 <- CreateSeuratObject(counts=W10, project="W10", min.cells = 3, min.features = 200)
# View W10
## W10
#########################################################################
# All samples
adp <- mapply(CreateSeuratObject,
counts = h5_read,
project = names(h5_read),
MoreArgs = list(min.cells = 3, min.features = 200)
)
# View adp
adp
## $D10
## An object of class Seurat
## 19084 features across 7623 samples within 1 assay
## Active assay: RNA (19084 features, 0 variable features)
## 1 layer present: counts
##
## $D16
## An object of class Seurat
## 16943 features across 5990 samples within 1 assay
## Active assay: RNA (16943 features, 0 variable features)
## 1 layer present: counts
##
## $D20
## An object of class Seurat
## 18242 features across 5093 samples within 1 assay
## Active assay: RNA (18242 features, 0 variable features)
## 1 layer present: counts
##
## $D26
## An object of class Seurat
## 17751 features across 7436 samples within 1 assay
## Active assay: RNA (17751 features, 0 variable features)
## 1 layer present: counts
##
## $W10
## An object of class Seurat
## 19059 features across 7382 samples within 1 assay
## Active assay: RNA (19059 features, 0 variable features)
## 1 layer present: counts
##
## $W16
## An object of class Seurat
## 17024 features across 5465 samples within 1 assay
## Active assay: RNA (17024 features, 0 variable features)
## 1 layer present: counts
##
## $W20
## An object of class Seurat
## 18354 features across 5427 samples within 1 assay
## Active assay: RNA (18354 features, 0 variable features)
## 1 layer present: counts
##
## $W26
## An object of class Seurat
## 17373 features across 6013 samples within 1 assay
## Active assay: RNA (17373 features, 0 variable features)
## 1 layer present: counts
# remove the original sparse matrices
rm(h5_read)
adp <- merge(adp[[1]],
y = adp[2:length(adp)],
add.cell.ids = names(adp), project = "Adipose"
)
#############################################################################################################
# If you want to apply QC metrics independently for each sample, you can use this for
# loop to create an individual object for each sample.
## for (file in files){
## seurat_data <- Read10X_h5(paste0("C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/R project dowloaded from manual/GettingStarted_scRNASeq/data/",
## file))
## seurat_obj <- CreateSeuratObject(counts = seurat_data,
## min.features = 200,
## min.cells = 3,
## project = file)
## assign(file, seurat_obj)
## }
# You can see the primary slots using:
## glimpse(W10_filtered_feature_bc_matrix.h5)
# to go back to the names layer
## Layers(W10_filtered_feature_bc_matrix.h5)
## W10_filtered_feature_bc_matrix.h5[["RNA"]]$counts |> head()
# or
## W10_filtered_feature_bc_matrix.h5@assays$RNA$counts |> head()
# or
## LayerData(W10_filtered_feature_bc_matrix.h5, assay="RNA", layer='counts') |> head()
# The metadata in the Seurat object is located in adp@metadata and contains the
# information associated with each cell.
################################################################################
head(adp@meta.data) # using head to return only the first 6 rows
## orig.ident nCount_RNA nFeature_RNA
## D10_AAACCCAAGATGCTTC-1 D10 11188 3383
## D10_AAACCCAAGCTCTTCC-1 D10 32832 5823
## D10_AAACCCAAGTCATCGT-1 D10 28515 5002
## D10_AAACCCAGTAGCGTCC-1 D10 18954 4305
## D10_AAACCCAGTGCACGCT-1 D10 27181 4586
## D10_AAACCCAGTGTAAATG-1 D10 3090 1030
# Access a single column.
head(adp$orig.ident)
## D10_AAACCCAAGATGCTTC-1 D10_AAACCCAAGCTCTTCC-1 D10_AAACCCAAGTCATCGT-1
## "D10" "D10" "D10"
## D10_AAACCCAGTAGCGTCC-1 D10_AAACCCAGTGCACGCT-1 D10_AAACCCAGTGTAAATG-1
## "D10" "D10" "D10"
# or
head(adp[["orig.ident"]])
## orig.ident
## D10_AAACCCAAGATGCTTC-1 D10
## D10_AAACCCAAGCTCTTCC-1 D10
## D10_AAACCCAAGTCATCGT-1 D10
## D10_AAACCCAGTAGCGTCC-1 D10
## D10_AAACCCAGTGCACGCT-1 D10
## D10_AAACCCAGTGTAAATG-1 D10
# acess multiple columns
# Access multiple columns, rows.
head(adp[[c("orig.ident", "nCount_RNA")]])[1:3, ]
## orig.ident nCount_RNA
## D10_AAACCCAAGATGCTTC-1 D10 11188
## D10_AAACCCAAGCTCTTCC-1 D10 32832
## D10_AAACCCAAGTCATCGT-1 D10 28515
# to save the Seurat object
saveRDS(adp, "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merged.rds")
library(Seurat) # Seurat toolkit
library(hdf5r) # for data import
library(patchwork) # for plotting
library(glmGamPoi) # for sctransform
# read the object
## adp <- readRDS("C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merged.rds")
glimpse(adp)
## Formal class 'Seurat' [package "SeuratObject"] with 13 slots
## ..@ assays :List of 1
## .. ..$ RNA:Formal class 'Assay5' [package "SeuratObject"] with 8 slots
## ..@ meta.data :'data.frame': 50429 obs. of 3 variables:
## .. ..$ orig.ident : chr [1:50429] "D10" "D10" "D10" "D10" ...
## .. ..$ nCount_RNA : num [1:50429] 11188 32832 28515 18954 27181 ...
## .. ..$ nFeature_RNA: int [1:50429] 3383 5823 5002 4305 4586 1030 5015 3927 4006 5489 ...
## ..@ active.assay: chr "RNA"
## ..@ active.ident: Factor w/ 8 levels "D10","D16","D20",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..- attr(*, "names")= chr [1:50429] "D10_AAACCCAAGATGCTTC-1" "D10_AAACCCAAGCTCTTCC-1" "D10_AAACCCAAGTCATCGT-1" "D10_AAACCCAGTAGCGTCC-1" ...
## ..@ graphs : list()
## ..@ neighbors : list()
## ..@ reductions : list()
## ..@ images : list()
## ..@ project.name: chr "Adipose"
## ..@ misc : list()
## ..@ version :Classes 'package_version', 'numeric_version' hidden list of 1
## .. ..$ : int [1:3] 5 1 0
## ..@ commands : list()
## ..@ tools : list()
# calculate meitocondril genes porcentage for quality control and normalization
adp[["percent.mt"]] <- PercentageFeatureSet(adp, pattern = "^mt-")
# set colors
cnames <- setNames(rep(c("cyan3", "darkgoldenrod1"), each = 4), levels(factor(adp@meta.data$orig.ident)))
cnames
## D10 D16 D20 D26
## "cyan3" "cyan3" "cyan3" "cyan3"
## W10 W16 W20 W26
## "darkgoldenrod1" "darkgoldenrod1" "darkgoldenrod1" "darkgoldenrod1"
# plot total counts per sample
VlnPlot(adp, features = "nCount_RNA", layer = "counts", group.by = "orig.ident", raster = FALSE, alpha = 0.2) +
scale_fill_manual(values = cnames)

# or using ggplot2
adp@meta.data %>%
ggplot(aes(color = orig.ident, x = nCount_RNA, fill = orig.ident)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = 650, color = "red", linetype = "dotted")

# determine number of features (genes)
VlnPlot(adp, features = "nFeature_RNA", group.by = "orig.ident") +
scale_fill_manual(values = cnames)

# visualize % mitocondrial genes
VlnPlot(adp, features = "percent.mt", group.by = "orig.ident") +
scale_fill_manual(values = cnames) +
geom_hline(yintercept = 10, color = "red")

# add time point metadata day 0 or day 6
adp$time_point <- ifelse(stringr::str_detect(adp@meta.data$orig.ident, "0"),
"Day 0", "Day 6"
)
adp$condition <- ifelse(stringr::str_detect(adp@meta.data$orig.ident, "^W"),
"WT", "DKO"
)
# a condition and time point merged metadata
adp$condition_tp <- paste(adp$condition, adp$time_point)
# scatter the point by number of features and mitocndrial genes
FeatureScatter(adp, feature1 = "percent.mt", feature2 = "nFeature_RNA", group.by = "orig.ident", split.by = "time_point")

# and by number of fetures and total counts
FeatureScatter(adp, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "orig.ident", split.by = "time_point", log = TRUE)

# filter
# Set one set of parameters for Day 0 samples;
# keep the rownames (Cell barcodes)
t0 <- adp@meta.data |>
filter(
time_point == "Day 0", nFeature_RNA > 350,
nCount_RNA > 650, percent.mt < 10
) |>
tibble::rownames_to_column("Cell") |>
pull(Cell)
# Set an alternative set of thresholds for Day 6 samples;
# keep the rownames (Cell barcodes)
t6 <- adp@meta.data |>
filter(
time_point == "Day 6", nFeature_RNA > 350,
nCount_RNA > 650, percent.mt < 25
) |>
tibble::rownames_to_column("Cell") |>
pull(Cell)
keep <- c(t0, t6)
# use different parameters; established above
adp_filt <- subset(adp, cells = keep)
# save file after filtering
saveRDS(adp_filt, "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merge_filt.rds")
# run sctransform
adp_filt <- SCTransform(adp_filt, vars.to.regress = "percent.mt", verbose = FALSE)
# Check default assay
############ DefaultAssay(object = adp_filt)
# Set default assay
########### DefaultAssay(object = adp_filt) <- "RNA"
# run PCA
adp_filt <- RunPCA(adp_filt, verbose = FALSE, assay = "SCT")
# visualizethte first 9 PC
DimHeatmap(adp_filt, dims = 1:9, cells = 500, balanced = TRUE, ncol = 3)

# perform the elbow analisys
ElbowPlot(adp_filt, ndims = 40)

# performing the neighbors analisys to prepara for the clusterin
adp_filt <- FindNeighbors(adp_filt, dims = 1:30)
# calculate the clusterin (suggessted redolution 0.4-1.2)
adp_filt <- FindClusters(adp_filt, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 42114
## Number of edges: 1478117
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9569
## Number of communities: 8
## Elapsed time: 10 seconds
# UMAP for the viasualization of the clusters
adp_filt <- RunUMAP(adp_filt, dims = 1:30)
DimPlot(adp_filt,
reduction = "pca", group.by = c("orig.ident", "seurat_clusters", "time_point", "condition", "condition_tp"),
alpha = 0.2, ncol = 2
)

DimPlot(adp_filt,
reduction = "umap", group.by = c("orig.ident", "seurat_clusters", "time_point", "condition", "condition_tp"),
alpha = 0.2, ncol = 2
)

# save the adp filtered file
saveRDS(adp_filt, "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merge_filt_sctran_clust0.1.rds")
# prepare the SCT data for the search of markers among clusters
adp_filt <- PrepSCTFindMarkers(adp_filt, verbose = T)
# For differential expression by finding markers (many method available, default a Wilcox rank sum. Look up test.use parameters for options)
# requires presto installation to speed up the next step
# devtools::install_github('immunogenomics/presto')
library(presto)
# find all markers
adp_filt_markers <- FindAllMarkers(adp_filt, only.pos = TRUE)
# ordering the results
adp_filt_markers <- adp_filt_markers %>%
arrange(cluster, desc(avg_log2FC), desc(p_val_adj))
# examine a small subset
adp_filt_markers %>%
group_by(cluster) %>%
slice_max(n = 5, order_by = avg_log2FC)
## # A tibble: 41 × 7
## # Groups: cluster [8]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 2.41e- 49 4.48 0.01 0.001 4.61e- 45 0 Erv3
## 2 0 4.27 0.163 0.011 0 0 Mlana
## 3 1.67e-144 4.00 0.032 0.002 3.20e-140 0 Pdlim3
## 4 0 3.88 0.11 0.008 0 0 Mamdc2
## 5 9.70e-146 3.72 0.033 0.002 1.85e-141 0 Nat8f3
## 6 0 3.61 0.261 0.042 0 1 Cxcl13
## 7 4.17e- 75 3.39 0.016 0.001 7.96e- 71 1 Dlx3
## 8 1.85e- 86 3.38 0.019 0.002 3.53e- 82 1 Hcn4
## 9 0 3.29 0.389 0.049 0 1 Serpina3m
## 10 9.23e- 40 3.19 0.01 0.001 1.76e- 35 1 Cstdc4
## # ℹ 31 more rows
# markers visualization
top20 <- adp_filt_markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1) %>%
slice_head(n = 20) %>%
ungroup()
DoHeatmap(adp_filt, features = top20$gene) + NoLegend()

# associate pubblished markers with cell type
# contaminants
contam <- c("Ptprc", "Plp1", "Pecam1")
# beige adipocytes
beige <- c("Ucp1", "Ppargc1a", "Elovl3", "Cidea")
# preadipocytes
preadip <- c("Mmp3", "Cd142", "Itgb1")
# proliferating
prolif <- "Mki67"
# differentiating adipocytes
diffadip <- c("Col5a3", "Serpina3n")
VlnPlot(adp_filt, features = contam)

# visualize in the cluster the cel type corresponding to the feature= parameter above (this case contam)
FeaturePlot(adp_filt, features = contam)

saveRDS(adp_filt_markers, "C:/Users/Owner/Documents/github/Seurat test script/seurat 2 basic script/outputs/adp_merge_filt_markers.rds")
DefaultAssay(adp_filt)
## [1] "SCT"
mem.maxVSize(vsize = 15000)
## [1] Inf
glimpse(adp_filt)
## Formal class 'Seurat' [package "SeuratObject"] with 13 slots
## ..@ assays :List of 2
## .. ..$ RNA:Formal class 'Assay5' [package "SeuratObject"] with 8 slots
## .. ..$ SCT:Formal class 'SCTAssay' [package "Seurat"] with 9 slots
## ..@ meta.data :'data.frame': 42114 obs. of 11 variables:
## .. ..$ orig.ident : chr [1:42114] "D10" "D10" "D10" "D10" ...
## .. ..$ nCount_RNA : num [1:42114] 11188 32832 28515 18954 27181 ...
## .. ..$ nFeature_RNA : int [1:42114] 3383 5823 5002 4305 4586 5015 3927 4006 5489 4093 ...
## .. ..$ percent.mt : num [1:42114] 3.16 1.8 1.39 1.89 1.33 ...
## .. ..$ time_point : chr [1:42114] "Day 0" "Day 0" "Day 0" "Day 0" ...
## .. ..$ condition : chr [1:42114] "DKO" "DKO" "DKO" "DKO" ...
## .. ..$ condition_tp : chr [1:42114] "DKO Day 0" "DKO Day 0" "DKO Day 0" "DKO Day 0" ...
## .. ..$ nCount_SCT : num [1:42114] 22992 25464 25285 23590 25170 ...
## .. ..$ nFeature_SCT : int [1:42114] 3790 5742 4974 4280 4552 4956 3904 4005 5363 4077 ...
## .. ..$ SCT_snn_res.0.1: Factor w/ 8 levels "0","1","2","3",..: 8 1 1 1 1 1 8 1 3 7 ...
## .. ..$ seurat_clusters: Factor w/ 8 levels "0","1","2","3",..: 8 1 1 1 1 1 8 1 3 7 ...
## ..@ active.assay: chr "SCT"
## ..@ active.ident: Factor w/ 8 levels "0","1","2","3",..: 8 1 1 1 1 1 8 1 3 7 ...
## .. ..- attr(*, "names")= chr [1:42114] "D10_AAACCCAAGATGCTTC-1" "D10_AAACCCAAGCTCTTCC-1" "D10_AAACCCAAGTCATCGT-1" "D10_AAACCCAGTAGCGTCC-1" ...
## ..@ graphs :List of 2
## .. ..$ SCT_nn :Formal class 'Graph' [package "SeuratObject"] with 7 slots
## .. ..$ SCT_snn:Formal class 'Graph' [package "SeuratObject"] with 7 slots
## ..@ neighbors : list()
## ..@ reductions :List of 2
## .. ..$ pca :Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
## .. ..$ umap:Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
## ..@ images : list()
## ..@ project.name: chr "Adipose"
## ..@ misc : list()
## ..@ version :Classes 'package_version', 'numeric_version' hidden list of 1
## .. ..$ : int [1:3] 5 1 0
## ..@ commands :List of 5
## .. ..$ SCTransform.RNA :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
## .. ..$ RunPCA.SCT :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
## .. ..$ FindNeighbors.SCT.pca:Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
## .. ..$ FindClusters :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
## .. ..$ RunUMAP.SCT.pca :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
## ..@ tools : list()
table(adp_filt$condition_tp)
##
## DKO Day 0 DKO Day 6 WT Day 0 WT Day 6
## 11655 10512 11807 8140
Idents(adp_filt) <- "SCT_snn_res.0.1"
table(Idents(adp_filt))
##
## 0 1 2 3 4 5 6 7
## 17062 7893 6691 6323 2251 979 557 358
######################################################################################
# Createa new metadata with the expression of gene (example CD3D) ##
# ##
## gene_name <- "CD3D" ##
# ##
# Extract gene expression values from the SCT assay ##
## seurat_obj[[gene_name]] <- FetchData(seurat_obj, vars = gene_name, assay = "SCT")##
######################################################################################
# Viewing the code, JoinLayers was called to link all the data sets together. This is
# a new feature of Seurat5, and is required for analyzing data after integration and batch correction. In this case fuse all the samples (D10,D16 , etc.)
# scRNA distributions are usually no normal. For this reason Student T test is usually not applied as it assume normality.
#instead Wilcos rank sum is preferes.
#A continuation the GAPDH distribution is analized to show the non-normal shape of the distribution tipical in scRNA.
plot(density(sample(JoinLayers(adp_filt@assays$RNA)$count["Gapdh", ], 2500)), cex = 0, lty = 1, main = "Density of Gapdh in 2500 random cells")

hist(sample(JoinLayers(adp@assays$RNA)$count["Gapdh", ], 2500), breaks = 99, main = "Histogram of Gapdh in 2500 random cells", ylab = "Frequency", xlab = "Gene counts")

###############################################################################
## how estract expression matrixes ##
## expr<-GetAssayData(adp_filt,assay = "SCT",slot = "data") ##
## expr_df <- as.data.frame(as.matrix(expr)) %>% ##
## rownames_to_column(var = "gene") %>% ##
## pivot_longer(-gene, names_to = "cell", values_to = "expression") ##
###############################################################################
adp_pp <- adp_filt
Idents(adp_pp) <- "SCT"
adp_pp_mks <- PrepSCTFindMarkers(adp_pp)
Idents(adp_pp_mks) <- "time_point"
table(Idents(adp_pp_mks))
##
## Day 0 Day 6
## 23462 18652
## to find different expression between a label and the others in a metadata idents (if between two specific labels "idents.2=" can be used to specify.)
Day0_Day6_DE <- FindMarkers(adp_pp_mks, ident.1 = "Day 0", test.use = "wilcox", min.pct = 0.01, logfc.threshold = 0.1)
Idents(adp_pp_mks) <- "SCT_snn_res.0.1"
DefaultAssay(adp_pp_mks) <- "SCT"
## find all markers between clusters
de_allClusters <- FindAllMarkers(adp_pp_mks, test.use = "wilcox", min.pct = 0.1, only.pos = TRUE)
head(de_allClusters)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## Emp3 0 1.547753 0.882 0.276 0 0 Emp3
## Tagln 0 1.761800 0.869 0.285 0 0 Tagln
## Mfap5 0 1.983247 0.967 0.410 0 0 Mfap5
## Igfbp6 0 2.156417 0.820 0.269 0 0 Igfbp6
## Acta2 0 1.658171 0.808 0.282 0 0 Acta2
## Cd9 0 1.769068 0.771 0.250 0 0 Cd9
## to check the most positivelly different markers per cluster
top5PerCluster <- matrix(ncol = 7)
colnames(top5PerCluster) <- colnames(de_allClusters)
for (i in 0:7) {
top5PerCluster <- rbind(top5PerCluster, head(de_allClusters[which(de_allClusters$cluster == i), ], 5))
}
top5PerCluster <- top5PerCluster[-1, ]
top5PerCluster
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## Emp3 0 1.5477527 0.882 0.276 0 0 Emp3
## Tagln 0 1.7617999 0.869 0.285 0 0 Tagln
## Mfap5 0 1.9832469 0.967 0.410 0 0 Mfap5
## Igfbp6 0 2.1564170 0.820 0.269 0 0 Igfbp6
## Acta2 0 1.6581714 0.808 0.282 0 0 Acta2
## A2m 0 2.1957915 0.645 0.228 0 1 A2m
## Folh1 0 2.7411588 0.476 0.078 0 1 Folh1
## Ifi207 0 1.8621548 0.760 0.366 0 1 Ifi207
## Scara5 0 3.1733128 0.433 0.052 0 1 Scara5
## Fzd1 0 1.6533596 0.613 0.244 0 1 Fzd1
## Mki67 0 4.9253267 0.935 0.071 0 2 Mki67
## Pclaf 0 4.7515750 0.940 0.078 0 2 Pclaf
## Birc5 0 4.6546210 0.931 0.078 0 2 Birc5
## Tpx2 0 4.6761468 0.900 0.080 0 2 Tpx2
## Top2a 0 4.6970243 0.942 0.125 0 2 Top2a
## Car3 0 0.4251338 0.921 0.188 0 3 Car3
## A2m1 0 1.5737209 0.905 0.200 0 3 A2m
## Cd361 0 1.1656463 0.958 0.255 0 3 Cd36
## Cdo1 0 1.2164844 0.916 0.240 0 3 Cdo1
## Pnpla2 0 1.3997130 0.936 0.293 0 3 Pnpla2
## Lipe1 0 4.1800830 0.947 0.158 0 4 Lipe
## Plin11 0 3.9131311 0.933 0.162 0 4 Plin1
## Slc36a21 0 4.4061239 0.839 0.071 0 4 Slc36a2
## Arxes12 0 3.8682761 0.886 0.129 0 4 Arxes1
## Pcx1 0 3.7818126 0.891 0.149 0 4 Pcx
## Pdgfb 0 6.1167040 0.192 0.005 0 5 Pdgfb
## Cdh5 0 7.9942350 0.176 0.007 0 5 Cdh5
## Upp1 0 6.7224151 0.171 0.003 0 5 Upp1
## Pecam1 0 8.3300366 0.171 0.003 0 5 Pecam1
## Cldn5 0 9.4795361 0.157 0.001 0 5 Cldn5
## Tyrobp 0 11.2962603 0.996 0.006 0 6 Tyrobp
## C1qc 0 11.4420963 0.996 0.007 0 6 C1qc
## Fcer1g 0 11.0250753 1.000 0.012 0 6 Fcer1g
## Trem2 0 10.4617098 0.993 0.006 0 6 Trem2
## Ctss 0 10.9513900 0.991 0.005 0 6 Ctss
## Lrg11 0 5.2286151 0.975 0.169 0 7 Lrg1
## Wfdc211 0 7.6023043 0.835 0.035 0 7 Wfdc21
## Plin41 0 3.7349377 0.897 0.115 0 7 Plin4
## Slc5a31 0 4.2728744 0.888 0.160 0 7 Slc5a3
## Ccl91 0 5.8271171 0.757 0.063 0 7 Ccl9
DoHeatmap(adp_pp_mks, features = top5PerCluster$gene, slot = "scale.data")

## visualization of specific features
fig1 <- DimPlot(adp_pp_mks, group.by = "time_point")
fig2 <- FeaturePlot(adp_pp_mks, features = "Acta2", order = T)
fig3 <- FeaturePlot(adp_pp_mks, features = "Cd36", order = T)
fig1 / (fig2 | fig3)

## is possible to join all cells as a seample and do a pseudobulk analisys
pseudo_adp <- AggregateExpression(adp_pp_mks, assays = "SCT", return.seurat = T, group.by = c("orig.ident", "time_point", "condition", "condition_tp"))
head(pseudo_adp@assays$SCT$counts)
## 6 x 8 sparse Matrix of class "dgCMatrix"
## D10_Day 0_DKO_DKO Day 0 D16_Day 6_DKO_DKO Day 6 D20_Day 0_DKO_DKO Day 0
## Xkr4 188 30 146
## Gm19938 202 66 135
## Sox17 52 . 30
## Mrpl15 5878 3159 4103
## Lypla1 1475 1216 1267
## Tcea1 9416 3132 5955
## D26_Day 6_DKO_DKO Day 6 W10_Day 0_WT_WT Day 0 W16_Day 6_WT_WT Day 6
## Xkr4 26 231 31
## Gm19938 61 179 51
## Sox17 . 60 .
## Mrpl15 3909 5546 2603
## Lypla1 1598 1738 1100
## Tcea1 3769 9304 2924
## W20_Day 0_WT_WT Day 0 W26_Day 6_WT_WT Day 6
## Xkr4 175 32
## Gm19938 115 62
## Sox17 25 .
## Mrpl15 4609 3309
## Lypla1 1272 1161
## Tcea1 6589 3580
pseudo_adp@meta.data
## orig.ident time_point condition
## D10_Day 0_DKO_DKO Day 0 D10_Day 0_DKO_DKO Day 0 Day 0 DKO
## D16_Day 6_DKO_DKO Day 6 D16_Day 6_DKO_DKO Day 6 Day 6 DKO
## D20_Day 0_DKO_DKO Day 0 D20_Day 0_DKO_DKO Day 0 Day 0 DKO
## D26_Day 6_DKO_DKO Day 6 D26_Day 6_DKO_DKO Day 6 Day 6 DKO
## W10_Day 0_WT_WT Day 0 W10_Day 0_WT_WT Day 0 Day 0 WT
## W16_Day 6_WT_WT Day 6 W16_Day 6_WT_WT Day 6 Day 6 WT
## W20_Day 0_WT_WT Day 0 W20_Day 0_WT_WT Day 0 Day 0 WT
## W26_Day 6_WT_WT Day 6 W26_Day 6_WT_WT Day 6 Day 6 WT
## condition_tp
## D10_Day 0_DKO_DKO Day 0 DKO Day 0
## D16_Day 6_DKO_DKO Day 6 DKO Day 6
## D20_Day 0_DKO_DKO Day 0 DKO Day 0
## D26_Day 6_DKO_DKO Day 6 DKO Day 6
## W10_Day 0_WT_WT Day 0 WT Day 0
## W16_Day 6_WT_WT Day 6 WT Day 6
## W20_Day 0_WT_WT Day 0 WT Day 0
## W26_Day 6_WT_WT Day 6 WT Day 6
glimpse(pseudo_adp)
## Formal class 'Seurat' [package "SeuratObject"] with 13 slots
## ..@ assays :List of 1
## .. ..$ SCT:Formal class 'Assay5' [package "SeuratObject"] with 8 slots
## ..@ meta.data :'data.frame': 8 obs. of 4 variables:
## .. ..$ orig.ident : chr [1:8] "D10_Day 0_DKO_DKO Day 0" "D16_Day 6_DKO_DKO Day 6" "D20_Day 0_DKO_DKO Day 0" "D26_Day 6_DKO_DKO Day 6" ...
## .. ..$ time_point : chr [1:8] "Day 0" "Day 6" "Day 0" "Day 6" ...
## .. ..$ condition : chr [1:8] "DKO" "DKO" "DKO" "DKO" ...
## .. ..$ condition_tp: chr [1:8] "DKO Day 0" "DKO Day 6" "DKO Day 0" "DKO Day 6" ...
## ..@ active.assay: chr "SCT"
## ..@ active.ident: Factor w/ 8 levels "D10_Day 0_DKO_DKO Day 0",..: 1 2 3 4 5 6 7 8
## .. ..- attr(*, "names")= chr [1:8] "D10_Day 0_DKO_DKO Day 0" "D16_Day 6_DKO_DKO Day 6" "D20_Day 0_DKO_DKO Day 0" "D26_Day 6_DKO_DKO Day 6" ...
## ..@ graphs : list()
## ..@ neighbors : list()
## ..@ reductions : list()
## ..@ images : list()
## ..@ project.name: chr "Aggregate"
## ..@ misc : list()
## ..@ version :Classes 'package_version', 'numeric_version' hidden list of 1
## .. ..$ : int [1:3] 5 1 0
## ..@ commands :List of 1
## .. ..$ ScaleData.SCT:Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
## ..@ tools : list()
# just to clean up the look a little bit
pseudo_adp <- RenameCells(pseudo_adp, new.names = gsub("_.*", "", pseudo_adp$orig.ident))
pseudo_adp$orig.ident <- gsub("_.*", "", pseudo_adp$orig.ident)
head(pseudo_adp@assays$SCT$counts)
## 6 x 8 sparse Matrix of class "dgCMatrix"
D10 D16 D20 D26 W10 W16 W20 W26
Xkr4 188 30 146 26 231 31 175 32
Gm19938 202 66 135 61 179 51 115 62
Sox17 52 . 30 . 60 . 25 .
Mrpl15 5878 3159 4103 3909 5546 2603 4609 3309
Lypla1 1475 1216 1267 1598 1738 1100 1272 1161
Tcea1 9416 3132 5955 3769 9304 2924 6589 3580
pseudo_adp@meta.data
## orig.ident time_point condition condition_tp
## D10 D10 Day 0 DKO DKO Day 0
## D16 D16 Day 6 DKO DKO Day 6
## D20 D20 Day 0 DKO DKO Day 0
## D26 D26 Day 6 DKO DKO Day 6
## W10 W10 Day 0 WT WT Day 0
## W16 W16 Day 6 WT WT Day 6
## W20 W20 Day 0 WT WT Day 0
## W26 W26 Day 6 WT WT Day 6
## performin bulk DE
Idents(pseudo_adp) <- "time_point"
## DESeq2 if not istalled: BiocManager::install("DESeq2")
bulk_adp_de <- FindMarkers(pseudo_adp, ident.1 = "Day 0", ident.2 = "Day 6", test.use = "DESeq2")
head(bulk_adp_de)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Nabp1 0 -1.960008 1 1 0
## Gpr176 0 1.624052 1 1 0
## Myl9 0 4.101615 1 1 0
## Pltp 0 1.256316 1 1 0
## Tm4sf1 0 3.128734 1 1 0
## Etv3 0 -1.190553 1 1 0
# comparing how many differentially expressed genes between SC and bulk analisys in function of the conditions
scDE.genes <- rownames(Day0_Day6_DE)[which(Day0_Day6_DE$p_val_adj < 0.05)]
bulkDE.genes <- rownames(bulk_adp_de)[which(bulk_adp_de$p_val_adj < 0.05)]
length(scDE.genes)
## [1] 10342
length(bulkDE.genes)
## [1] 8526
## heck the common features between sc and bulk
length(intersect(scDE.genes, bulkDE.genes))
## [1] 6619
head(intersect(scDE.genes, bulkDE.genes), 100)
## [1] "Emp3" "Tagln" "Acta2" "Tpm2" "Cd36" "A2m"
## [7] "Cd9" "Rbp4" "Mfap5" "Igfbp6" "Myl9" "Anxa3"
## [13] "Timp2" "Car3" "Cyba" "Cnn2" "Thy1" "Tpm1"
## [19] "Lbh" "Actn1" "Cdo1" "Scd1" "Ccn4" "Rhoc"
## [25] "Pdlim2" "Ccn5" "Timp3" "Ccn2" "Selenom" "Basp1"
## [31] "Clic1" "Pnpla2" "Phldb2" "Col5a3" "Phgdh" "F3"
## [37] "Arl4a" "mt-Nd6" "Thbs1" "Crip1" "Arxes2" "Mfap4"
## [43] "Lrrc32" "Map1b" "Ddah2" "Cnn3" "Fhl2" "Ahnak2"
## [49] "Prelp" "Npdc1" "Rbp1" "Mmp23" "Vat1" "Cyb5r1"
## [55] "Tuba1a" "Nenf" "Serpinf1" "Bst2" "Hes1" "Arid5b"
## [61] "Atf5" "Myof" "Lgalsl" "Fabp4" "Adamts5" "Tbcb"
## [67] "Ctsk" "Flna" "Prg4" "Btg1" "Adm" "Acsl1"
## [73] "Eva1b" "Asns" "S100a4" "Abracl" "Emp2" "Ywhah"
## [79] "Plk2" "Plec" "Mxra7" "Tm4sf1" "Metrnl" "Tgfb1i1"
## [85] "Nupr1" "Glul" "Msn" "Vps29" "Ccl7" "Axl"
## [91] "Sri" "Pls3" "Cygb" "Gpx1" "Ifi207" "Gpx8"
## [97] "Prdx4" "Serpina3n" "Ttc28" "Adipoq"
## to chech spefic features
bulk_adp_de[c("Acta2", "Cd36"), ]
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Acta2 0.000000e+00 5.482773 1 1 0.000000e+00
## Cd36 2.413872e-93 -4.063080 1 1 4.611462e-89
## visualize the DE genes
Idents(adp_pp) <- "SCT_snn_res.0.1"
DotPlot(adp_pp, features = unique(top5PerCluster$gene), dot.scale = 3) + coord_flip()

# violine plot as alternative visualization
Idents(adp_pp) <- "time_point"
VlnPlot(adp_pp, features = c("Acta2", "Cd36"), alpha = 0.1)

## anotate the differential genes
markers <- c("Mmp3", "Mki67", "Fabp4", "Scd1", "Ucp1", "Ppargc1a", "Elovl3", "Cidea")
Idents(adp_pp) <- "SCT_snn_res.0.1"
avgExp <- AverageExpression(adp_pp, markers, assay = "SCT")$SCT
avgExp
## 8 x 8 sparse Matrix of class "dgCMatrix"
## g0 g1 g2 g3 g4
## Mmp3 1.008188e+01 2.41986570 2.056792707 0.63925352 0.1550422
## Mki67 6.781151e-02 0.09261371 5.246151547 0.38178080 0.1257219
## Fabp4 3.001700e+00 52.90814646 24.516514721 159.78317254 452.0910706
## Scd1 6.420701e-01 7.15912834 2.371244956 26.48774316 80.5135495
## Ucp1 . 0.00836184 0.003138544 0.01534082 0.1332741
## Ppargc1a 3.868245e-03 0.04003547 0.012703632 0.10975803 0.8187472
## Elovl3 6.447075e-04 0.06461422 0.028097444 0.17744741 1.3083074
## Cidea 1.406635e-03 0.04408970 0.013301450 0.13000158 0.7250111
## g5 g6 g7
## Mmp3 2.413687436 1.448833034 1.16201117
## Mki67 0.373850868 1.935368043 0.24022346
## Fabp4 14.020429009 2.181328546 132.08379888
## Scd1 1.168539326 0.210053860 68.46648045
## Ucp1 . . .
## Ppargc1a 0.008171604 . 0.05027933
## Elovl3 0.007150153 . .
## Cidea 0.008171604 0.001795332 .
DimPlot(adp_pp, label = T)

FeaturePlot(adp_pp, features = markers, ncol = 3, order = T)

## after finding the markers associated with a particular cluster we can annotate for the cell type.
adipocyte <- vector(length = ncol(adp_pp))
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(0, 5))] <- "Preadipocytes"
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(2, 6))] <- "Proliferating cells"
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(1, 3))] <- "Differentiating beige adipocytes"
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(4))] <- "Differentiated beige adipocytes"
adipocyte[which(adp_pp$SCT_snn_res.0.1 %in% c(7))] <- "Unclassified"
adp_pp$adipocyte <- adipocyte
f1 <- DimPlot(adp_pp, group.by = "SCT_snn_res.0.1", label = T) + NoLegend()
f2 <- DimPlot(adp_pp, group.by = "time_point") + NoLegend()
f3 <- DimPlot(adp_pp, group.by = "adipocyte", label = T) + NoLegend()
(f1 / f2 | f3)

## SingleR annotation. This method annotate each cell in the dataset against a reference dataset.
library(SingleR) # for cell type annotation; Bioconductor
library(celldex) # for cell type annotation reference; Bioconductor
library(MAST) # for differential expression; Bioconductor
adp.sce <- as.SingleCellExperiment(adp_pp, assay = "SCT") # This selects *only* the SCT assay
mouseRNASeq <- celldex::MouseRNAseqData()
head(mouseRNASeq)
## class: SummarizedExperiment
## dim: 6 358
## metadata(0):
## assays(1): logcounts
## rownames(6): Xkr4 Rp1 ... Lypla1 Tcea1
## rowData names(0):
## colnames(358): ERR525589Aligned ERR525592Aligned ... SRR1044043Aligned
## SRR1044044Aligned
## colData names(3): label.main label.fine label.ont
table(mouseRNASeq$label.main)
##
## Adipocytes Astrocytes B cells Cardiomyocytes
## 13 27 5 8
## Dendritic cells Endothelial cells Epithelial cells Erythrocytes
## 2 12 2 3
## Fibroblasts Granulocytes Hepatocytes Macrophages
## 45 15 4 32
## Microglia Monocytes Neurons NK cells
## 72 6 64 18
## Oligodendrocytes T cells
## 12 18
table(mouseRNASeq$label.fine)
##
## Adipocytes aNSCs Astrocytes
## 13 8 24
## Astrocytes activated B cells Cardiomyocytes
## 3 5 8
## Dendritic cells Endothelial cells Ependymal
## 2 12 2
## Erythrocytes Fibroblasts Fibroblasts activated
## 3 27 9
## Fibroblasts senescent Granulocytes Hepatocytes
## 9 15 4
## Macrophages Macrophages activated Microglia
## 26 6 56
## Microglia activated Monocytes Neurons
## 16 6 39
## Neurons activated NK cells NPCs
## 4 18 11
## Oligodendrocytes OPCs qNSCs
## 10 2 2
## T cells
## 18
annot <- SingleR::SingleR(test = adp.sce, ref = mouseRNASeq, labels = mouseRNASeq$label.main)
head(annot)
## DataFrame with 6 rows and 4 columns
## scores labels delta.next
## <matrix> <character> <numeric>
## D10_AAACCCAAGATGCTTC-1 0.523246:0.275495:0.210562:... Adipocytes 0.2110472
## D10_AAACCCAAGCTCTTCC-1 0.499957:0.250442:0.226092:... Fibroblasts 0.1624889
## D10_AAACCCAAGTCATCGT-1 0.474176:0.213152:0.189308:... Fibroblasts 0.3422910
## D10_AAACCCAGTAGCGTCC-1 0.449344:0.246099:0.227259:... Fibroblasts 0.0504144
## D10_AAACCCAGTGCACGCT-1 0.399770:0.179617:0.207093:... Fibroblasts 0.0520780
## D10_AAACCCATCCACTGAA-1 0.498567:0.249653:0.189627:... Fibroblasts 0.0797964
## pruned.labels
## <character>
## D10_AAACCCAAGATGCTTC-1 Adipocytes
## D10_AAACCCAAGCTCTTCC-1 Fibroblasts
## D10_AAACCCAAGTCATCGT-1 Fibroblasts
## D10_AAACCCAGTAGCGTCC-1 Fibroblasts
## D10_AAACCCAGTGCACGCT-1 Fibroblasts
## D10_AAACCCATCCACTGAA-1 Fibroblasts
# if a label is to weak during SingleR the cell is taged as NA. Now we add the labels determine to the metadata of the Seurat object
table(annot$pruned.labels, useNA = "ifany") # useNA can be used turned on in the `table` function
##
## Adipocytes Endothelial cells Fibroblasts Macrophages
## 13406 103 27696 478
## <NA>
## 431
adp_pp$mouseRNASeq.main <- annot$pruned.labels
## let's visualize the final result
annotFig1 <- DimPlot(adp_pp, group.by = "adipocyte", label = T) + NoLegend()
annotFig2 <- DimPlot(adp_pp, group.by = "mouseRNASeq.main", label = T)
annotFig1 | annotFig2

## annotation by cluster
Idents(adp_pp) <- "SCT_snn_res.0.1" # Assign clusters as the identities
avgExp <- AverageExpression(adp_pp, assays = "SCT")$SCT # Run AverageExpression on the SCT assay and return only SCT
clustAnnot <- SingleR::SingleR(test = avgExp, ref = mouseRNASeq, labels = mouseRNASeq$label.main) # Run SingleR on the averaged expression matrix
clustAnnot
## DataFrame with 8 rows and 4 columns
## scores labels delta.next pruned.labels
## <matrix> <character> <numeric> <character>
## g0 0.764947:0.405104:0.313582:... Fibroblasts 0.0900693 Fibroblasts
## g1 0.811652:0.425415:0.305177:... Fibroblasts 0.1090396 Fibroblasts
## g2 0.762817:0.391869:0.314699:... Fibroblasts 0.0699542 Fibroblasts
## g3 0.822446:0.419011:0.303251:... Adipocytes 0.0520561 Adipocytes
## g4 0.837119:0.421358:0.311282:... Adipocytes 0.1554873 Adipocytes
## g5 0.730079:0.366069:0.360429:... Fibroblasts 0.0609226 Fibroblasts
## g6 0.570732:0.211146:0.541797:... Macrophages 0.1006733 Macrophages
## g7 0.828817:0.388983:0.313862:... Adipocytes 0.0646115 Adipocytes
clustLabels <- as.vector(clustAnnot$pruned.labels) # retrieve only the cluster-derived annotations
names(clustLabels) <- c(0:7) # assign the cluster numbers as the annotations
clustLabels.vect <- clustLabels[match(adp_pp$SCT_snn_res.0.1, names(clustLabels))] # match the cluster identities per cell in the Seurat data to the cluster labels
names(clustLabels.vect) <- colnames(adp_pp) # ensure that the cluster identities are assigned the cell names
adp_pp$mouseRNASeq.main.clust <- clustLabels.vect # add the cluster annotations to the vector
clustAnnotFig1 <- DimPlot(adp_pp, group.by = "SCT_snn_res.0.1", label = T) + NoLegend()
clustAnnotFig2 <- DimPlot(adp_pp, group.by = "adipocyte", label = T) + NoLegend()
clustAnnotFig3 <- DimPlot(adp_pp, group.by = "mouseRNASeq.main")
clustAnnotFig4 <- DimPlot(adp_pp, group.by = "mouseRNASeq.main.clust")
(clustAnnotFig1 | clustAnnotFig2) / (clustAnnotFig3 | clustAnnotFig4)
